clear
set more off
pause on

cap log close
log using clean-election.log, text replace

******************************************************************************************
* Purpose: 	this do file uses the raw CA local election data in order to link individual 
*			over time by name, ballot designation, and county
*			Data Source: Sac State Institute for Social Reseach 
*			http://csus-dspace.calstate.edu/handle/10211.3/210187
* input:	CEDAXData.xls
* output: 	ca-localX.dta
******************************************************************************************

**************
* Insheet data
**************

forval x=1995/2014 {
	if `x'<2011 import excel using CEDA`x'Data.xls, sheet("Candidates`x'") firstrow clear
	if `x'>=2011 & `x'<=2013 import excel using CEDA`x'Data.xlsx, sheet("Candidates`x'") firstrow clear
	if `x'>=2014 & `x'<=2014 import excel using CEDA`x'Data.xlsx, sheet("candidates`x'") firstrow clear
	desc
	summ
	save CEDA`x'Data.dta, replace
}	

****************************************
* Clean up data and pare down for merge
****************************************

forval x=1995/2014 {

	use CEDA`x'Data.dta, clear
	count
	
	*************************
	* Clean string variables
	*************************

	* Clean up names, remove leading/trailing spaces
	replace FIRST = trim(FIRST)
	replace LAST = trim(LAST)
	
	* Remove punctation from first names	
	gen x = subinstr(FIRST,`"""',"",.)
	replace x = subinstr(x,`"'"',"",.)	
	replace x = subinstr(x,`"("',"",.)
	replace x = subinstr(x,`")"',"",.)
	replace x = subinstr(x,".","",.)	
	replace FIRST = x
	drop x
	
	* Capitalize everything
	foreach var in FIRST LAST CNTYNAME {
		replace `var' = upper(`var')
	}
	
	* Want to merge on the first word in the first name (since there are often middle initials/names)
	* unless first word is just a letter, then take full string
	rename FIRST FIRST_orig
	gen FIRST=word(FIRST_orig, 1)
	replace FIRST=FIRST_orig if length(FIRST)==1
	replace FIRST = trim(FIRST)	
	
	* Eliminate internal blanks in first name if only initials
	gen x = subinstr(FIRST, " ","",.)
	replace FIRST = x if FIRST!=x & length(x)==2
	drop x
	
	* Replace first name with second/third word if initial(s) plus word
	gen x=word(FIRST, 1)
	gen y=word(FIRST, 2)
	gen z=word(FIRST, 3)	
	replace FIRST = y if FIRST!=x & length(x)==1 & length(y)!=1
	replace FIRST = z if FIRST!=x & length(x)==1 & length(y)==1	
	drop x y z	

	* Remove punctation from last names	
	replace LAST = subinstr(LAST, ",", "", .)
	replace LAST = subinstr(LAST, ".","",.)	
	replace LAST = subinstr(LAST, "'","",.)	
	replace LAST = subinstr(LAST, ")","",.)	
	replace LAST = subinstr(LAST, "(","",.)	
	replace LAST = subinstr(LAST,`"""',"",.)
	replace LAST = subinstr(LAST,"-","",.)	
	
	* Parse suffixes out of last name
	* JR SR I/II/III
	gen LAST_orig = LAST
	gen SUFFIX = ""
	foreach sfx in IV JR SR III II {
		gen x = subinstr(LAST, " `sfx'","",.)
		replace SUFFIX = "`sfx'" if LAST!=x
		replace LAST = x if LAST!=x
		drop x
	}
	replace LAST = subinstr(LAST, " MD", "", .)
	replace LAST = trim(LAST)
	
	* Eliminate internal blanks in last name 
	gen x = subinstr(LAST, " ","",.)
	list LAST x if LAST!=x
	replace LAST = x if LAST!=x
	drop x
		
	* Clean up office names
	replace OFFICE = upper(OFFICE)
	replace OFFICE = subinstr(OFFICE, "(R)", "", .)
	replace OFFICE = subinstr(OFFICE, "-", "", .)
	replace OFFICE = trim(OFFICE)
	
	* Standardize some common nicknames
	replace FIRST = "JAMES" if FIRST=="JIM"
	replace FIRST = "MICHAEL" if FIRST=="MIKE"
	replace FIRST = "THOMAS" if FIRST=="TOM"
	replace FIRST = "EDWARD" if FIRST=="ED"
	replace FIRST = "THEODORE" if FIRST=="TED"
	replace FIRST = "JOSEPH" if FIRST=="JOE"
	replace FIRST = "NATHANIEL" if FIRST=="NAT"
	replace FIRST = "CHARLES" if FIRST=="CHUCK" | FIRST=="CHARLIE"
	replace FIRST = "LAWRENCE" if FIRST=="LARRY"
	replace FIRST = "SUSAN" if FIRST=="SUE"
	replace FIRST = "DEBORAH" if FIRST=="DEBBIE"
	replace FIRST = "NICHOLAS" if FIRST=="NICK"
	replace FIRST = "DAVID" if FIRST=="DAVE"
	replace FIRST = "KENNETH" if FIRST=="KEN"
	replace FIRST = "DANIEL" if FIRST=="DAN"
	
	***************************************
	* Clean up vote totals, candidate rank
	***************************************

	* In 2011, there are 33 missing values for total votes in the excel file
	if `x'==2011 replace TOTVOTES = subinstr(TOTVOTES, ",", "",.)
	if `x'==2011 destring TOTVOTES, replace force

	* Correct the computation of total votes in a couple elections
	* Correct the number of candidates in a couple of elections
	if `x'==2008 {
		* this race has inconsistencies 
		drop if RaceID==200801317
	}
	if `x'==2011 {
		* this race has inconsistencies in the number of candidates and number of ranked candidates
		drop if RaceID==291
	}
	if `x'==2012 {
		replace TOTVOTES = 141 if RaceID==201200007
		replace TOTVOTES = 10392 if RaceID==201200613
	}
	if `x'==2013 {
		replace TOTVOTES = 17010 if RaceID==201300075
		replace TOTVOTES = 12813 if RaceID==201300333

		replace CAND = 2 if RaceID==201300279
		bys RaceID (VOTES): replace RVOTES = _n if RaceID==201300333
	}
	if `x'==2014 {
		* many inconsistencies in these races
		drop if RaceID==20140540
		drop if RaceID==20141475
		drop if RaceID==20140541

		replace TOTVOTES = 6934 if RaceID==20141699
		replace TOTVOTES = 48659 if RaceID==20140541

		replace CAND = 3 if RaceID==20141013
		replace CAND = 2 if RaceID==20141014
	}

	* Check that certain variables are constant within a given race
	foreach var in TOTVOTES CAND VOTE {
		bys RaceID: egen check = max(`var')
		assert check == `var'
		drop check 
	}
	
	*********************
	* Define variables
	*********************

	gen year`x'=1	
	if `x'<2014 rename ELECTED elected`x'
	if `x'>=2014 rename elected elected`x'
	gen occ`x' = BALDESIG 
	rename RaceID raceid`x'
	gen voteshare`x' = VOTES/TOTVOTES
	rename RVOTES rank`x' 
	rename CAND num_cand`x'
	rename VOTE vote`x'
	rename OFFICE office`x'
	rename PLACE place`x'
	rename DATE date`x'
	if `x'<2013 rename INCUMB incumbent`x'
	if `x'>=2013 rename INC incumbent`x'
	rename TERM term`x'
	rename FIRST_orig first_orig`x'
	if `x'<2014 rename RUNOFF runoff`x'
	if `x'>=2014 rename runoff runoff`x'
	rename TOTVOTES totalvotes`x'
		
	keep LAST FIRST BALDESIG CNTYNAME Multi_CO *`x' 
	
	* Compute margin of victory
	* Margin is non-missing for:
	*	(0) Elections in which there are more candidates than individuals voted into office
	*	(1) Single winner elections: two highest ranked candidates
	*	(2) Multi winner elections: last winner/first loser
	
	duplicates tag raceid`x', g(tag`x')
	
	sort raceid`x' rank`x'
		
	* Margin of victory = individual vote share - vote share of next strongest opponent (up/down)
	* Single winner elections
	bys raceid`x' term`x' (rank`x'): gen margin`x' = voteshare`x' - voteshare`x'[_n+1] if rank`x'==1 & vote`x'==1
	bys raceid`x' term`x' (rank`x'): replace margin`x' = voteshare`x' - voteshare`x'[_n-1] if rank`x'==2  & vote`x'==1
 	
 	* Multi winner elections
 	bys raceid`x' term`x' (rank`x'): replace margin`x' = voteshare`x' - voteshare`x'[_n+1] if rank`x'==vote`x' 
	bys raceid`x' term`x' (rank`x'): replace margin`x' = voteshare`x' - voteshare`x'[_n-1] if rank`x'==(vote`x'+1) 
	
	* Deal with ties
	duplicates report raceid`x' term`x' rank`x' 
	
	* Drop race if should have a certain number of winners, but doesn't due to tie
	gen flag = rank`x'==vote`x' & elected`x'==2
	bys raceid`x': egen max_flag = max(flag)
	tab flag, m
	drop if flag!=0 & flag!=.
	drop flag max_flag
	
	duplicates tag raceid`x' term`x' rank`x' , g(tag)
	assert tag==0 | tag==1 | tag==2 if (rank`x'==vote`x' | rank`x'==vote`x'+1)
	tab tag if rank`x'==2  & vote`x'==1
	tab tag if rank`x'==vote`x' 
	tab tag if rank`x'==(vote`x'+1) 
	
	assert tag==0 if rank`x'==1 & vote`x'==1
		
	* single winner, tie for runner up
	bys raceid`x' term`x' (rank`x' LAST): replace margin`x' = voteshare`x' - voteshare`x'[_n-1] if rank`x'==2  & vote`x'==1 & tag==1 & _n==2
	bys raceid`x' term`x' (rank`x' LAST): replace margin`x' = voteshare`x' - voteshare`x'[_n-2] if rank`x'==2  & vote`x'==1 & tag==1 & _n==3
	
	* multi winner, twoway tie for last winner
	bys raceid`x' term`x' (rank`x' LAST): replace margin`x' = voteshare`x' - voteshare`x'[_n+2] if rank`x'==(vote`x'-1) & tag==1 & _n==(vote`x'-1)
	bys raceid`x' term`x' (rank`x' LAST): replace margin`x' = voteshare`x' - voteshare`x'[_n+1] if rank`x'==(vote`x'-1) & tag==1 & _n==vote`x'

	* multi winner, threeway tie for last winner
	bys raceid`x' term`x' (rank`x' LAST): replace margin`x' = voteshare`x' - voteshare`x'[_n+3] if rank`x'==(vote`x'-2) & tag==2 & _n==(vote`x'-2)
	bys raceid`x' term`x' (rank`x' LAST): replace margin`x' = voteshare`x' - voteshare`x'[_n+2] if rank`x'==(vote`x'-2) & tag==2 & _n==(vote`x'-1)
	bys raceid`x' term`x' (rank`x' LAST): replace margin`x' = voteshare`x' - voteshare`x'[_n+1] if rank`x'==(vote`x'-2) & tag==2 & _n==(vote`x')

	* multi winner, twoway tie for runner up
	bys raceid`x' term`x' (rank`x' LAST): replace margin`x' = voteshare`x' - voteshare`x'[_n-1] if rank`x'==(vote`x'+1) & tag==1 & _n==(vote`x'+1)
	bys raceid`x' term`x' (rank`x' LAST): replace margin`x' = voteshare`x' - voteshare`x'[_n-2] if rank`x'==(vote`x'+1) & tag==1 & _n==(vote`x'+2)

	* multi winner, threeway tie for runner up
	bys raceid`x' term`x' (rank`x' LAST): replace margin`x' = voteshare`x' - voteshare`x'[_n-1] if rank`x'==(vote`x'+1) & tag==2 & _n==(vote`x'+1)
	bys raceid`x' term`x' (rank`x' LAST): replace margin`x' = voteshare`x' - voteshare`x'[_n-2] if rank`x'==(vote`x'+1) & tag==2 & _n==(vote`x'+2)
	bys raceid`x' term`x' (rank`x' LAST): replace margin`x' = voteshare`x' - voteshare`x'[_n-3] if rank`x'==(vote`x'+1) & tag==2 & _n==(vote`x'+3)
	
	* hand correct some ties
	if `x'==1995 {
		replace margin`x' = . if raceid`x'==199500444 
	}
	if `x'==1996 {
		replace margin`x' = .0495868 if raceid`x'==199601512 & rank`x'==7
		replace margin`x' = -.0495868 if raceid`x'==199601512 & rank`x'==9
	}
	if `x'==1997 {
		replace margin`x' = -.0482077 if raceid`x'==199700367 & rank`x'==4
	}
	if `x'==1998 {
		replace margin`x' = .1470588 if raceid`x'==199801265 & rank`x'==3
		replace margin`x' = -.1470588 if raceid`x'==199801265 & rank`x'==5
	}
	if `x'==1999 {
		* race has more winners than it is supposed to
		drop if raceid`x'==199900250
	}
	if `x'==2000 {
		replace margin`x' = .0533333 if raceid`x'==200001073 & rank`x'==3
		replace margin`x' = -.0533333 if raceid`x'==200001073 & rank`x'==5
	}
	if `x'==2003 {
		replace margin`x' = . if raceid`x'==200300500
	}
	if `x'==2004 {
		replace margin`x' = -.0280917 if raceid`x'==200401425
	}
	if `x'==2007 {
		drop if raceid`x'==200700279
	}
	if `x'==2008 {
		replace margin`x' = -.0162252 if raceid`x'==200800554 & rank`x'==2
	}
	if `x'==2009 {
		replace margin`x' = . if raceid`x'==200900447 & rank`x'==4
	}
	if `x'==2012 {
		replace margin`x' = . if raceid`x'==201201122 
		replace margin`x' = . if raceid`x'==201201306 
		replace margin`x' = . if raceid`x'==201201420 
		replace margin`x' = . if raceid`x'==201201489 
	}
	if `x'==2014 {
		replace margin`x' = -.0147059 if raceid`x'==20141427 & rank`x'==4
		replace margin`x' = .0147059 if raceid`x'==20141427 & rank`x'==2
		drop if raceid`x'==20141690
	}
	
	drop tag
	
	summ margin`x'
	assert margin`x'!=0

	********************************
	* Drop idiosyncratic races
	********************************

	* Drop multi-county races
	tab Multi_CO, m 
	drop if Multi_CO==1
	drop Multi_CO
	
	* Drop the few elections that have .5 ranks 
	gen temp = int(rank)
	gen flag = 1 if temp!=rank
	bys raceid: egen max_flag = max(flag)
	drop if max_flag==1
	drop temp flag max_flag
	
	* Drop the few elections with invalid number of individuals to vote in
	tab vote`x'
	drop if vote`x'==0
	
	* Drop the few elections where the vote share doesn't make sense given the number of candidates
	* Vote share 1, more than one candidate, all candidates win
	drop if voteshare==1 & num_cand>1 & elected==1
	
	* Drop the few elections where the reported number of candidates is more than those listed
	bys raceid term: gen n = _n
	bys raceid term: egen n_max = max(n)
	list if n_max<num_cand
	drop if n_max<num_cand 
	
	**************************************************************
	* Check that margin of victory is defined for the right folks
	**************************************************************

	duplicates tag raceid term rank, g(tag_rank)
	tab tag_rank
	
	* For single winner races with only one official candidate, fill in margin with voteshare
	replace margin = voteshare if margin==. & vote`x'==1 & num_cand`x'==1
	
	* Assert that margin is non-missing for last winners/first losers 
	assert margin!=. if vote`x'==1 & rank`x'<=2 & voteshare!=.
	assert margin!=. if vote`x'!=1 & (rank`x'==vote`x' | rank`x'==(vote`x'+1)) & num_cand`x'>=(vote`x'+1) & voteshare!=.
	
	* Assert that margin is missing for everyone else
	assert margin==. if vote`x'==1 & rank`x'>3 
	assert margin==. if vote`x'!=1 & rank`x'!=vote`x' & rank`x'!=(vote`x'+1) & tag_rank==0
	assert margin==. if vote`x'!=1 & (rank`x'==vote`x' | rank`x'==(vote`x'+1)) & num_cand`x'<(vote`x'+1)
	
	drop tag* n n_max
	
	save CEDA`x'Data-temp.dta, replace
	
	*************************************************
	* Remove duplicates within a given calendar year
	*************************************************
	
	* make some hand corrections to maintain internal consistency
	if `x'==1996 {
		replace FIRST="KENNETH" if FIRST=="KEN" & LAST=="MERCER" 
		replace FIRST="THOMAS" if FIRST=="TOM" & LAST=="TRYON" 
		replace FIRST="MARK" if FIRST=="JMARK" & LAST=="NIELSEN"
		replace LAST="WATERSON" if LAST=="WATERSTON" & FIRST=="BOB"
		replace LAST="BRINKISALEX" if LAST=="ALEX" & FIRST=="NIDA"
		replace LAST="DEBOTTARI" if LAST=="DEBOTARI" & FIRST=="LOU"
		replace LAST="GUTIERREZ" if LAST=="GUTTIERREZ" & FIRST=="STEVE"
		replace FIRST="ROBERT" if FIRST=="ROGER" & LAST=="FREBERG"
		replace CNTYNAME="SISKIYOU" if CNTYNAME=="SOLANO" & LAST=="LOWE" & FIRST=="DORIS"
		replace CNTYNAME="SISKIYOU" if CNTYNAME=="SOLANO" & LAST=="SILVA" & FIRST=="JOHN"	
		replace LAST="REILLY" if LAST=="REILLEY" & FIRST=="MICHAEL"
		replace FIRST="HAROLD" if FIRST=="PETER" & LAST=="SMITH"
		* leave William Sommers -- something up with the runoff election
		replace elected=2 if FIRST=="WILLIAM" & LAST=="SOMMERS"
		replace BALDESIG="Deputy Marshal" if BALDESIG=="Dep. Marshal" & LAST=="BAILEY" & FIRST=="FRED"
		replace BALDESIG="Teacher-Businessman" if BALDESIG=="Public School Teacher" & LAST=="BERNING" & FIRST=="CLIFFORD"	
		replace office="MARSHALL, VALLEJO/BENECIA JUD." if office=="MARSHALL, VALLEJOBENECIA JUD."	
		replace place="NEVADA COUNTY BOARD OF EDUCATION" if place=="NEVADA COUNTY BOARD OF EDUCATON"
		replace place="POINT ARENA" if place=="PT. ARENA" & LAST=="MCFARLAND"
		replace FIRST="JAMES" if FIRST=="JIM" & LAST=="RICHEIMER" & CNTYNAME=="ORANGE"
	}
	if `x'==1997 {
		replace FIRST="ANTHONY" if FIRST=="ANTHONYTONY" & LAST=="DEFELICIS"
		replace FIRST="JOHN" if FIRST=="JD" & LAST=="MARTINEZ"
	}
	if `x'==1998 {
		replace FIRST="LYNETTE" if FIRST=="LYN" & LAST=="NORFOLK"
		replace LAST="BRUNGESS" if LAST=="BRUNGRSS" & FIRST=="JULIE"
		* Gail Meyers didn't end up in the run off
		replace elected=2 if FIRST=="GAIL" & LAST=="MEYERS"
		* confirmed this one online, since it involves a gender switch
		replace FIRST="JOAN" if FIRST=="JOHN" & LAST=="DANIELSEN" 
		replace LAST="GARDNER" if FIRST=="JAMES" & LAST=="GARDINER"
		* Thomas Garrity actually came in third place
		replace elected=2 if FIRST=="THOMAS" & LAST=="GARRITY"
		replace FIRST="KEITH" if FIRST=="KIETH" & LAST=="ROSS"
		replace LAST="SYLVESTER" if LAST=="SYLWESTER" & FIRST=="LAURIE"
		replace FIRST="PATRICIA" if FIRST=="PAT" & LAST=="DANDO"
		replace office="SHERIFFMARSHALL" if office=="SHERIFFCORONER" & place=="MENDOCINO"
		replace FIRST = "JAMES" if FIRST=="JIM" & LAST=="STANTON" & CNTYNAME=="SAN DIEGO"
	}
	if `x'==1999 {
		replace FIRST="TERENCE" if FIRST=="TERRANCE" & LAST=="HALLINAN"
		replace FIRST = "JOSEPH" if FIRST=="JOESPH" & LAST=="OLENCHALK" & CNTYNAME=="CONTRA COSTA"
	}
	if `x'==2000 {
		* confirmed this one online, since it involves a gender switch
		replace FIRST="JON" if FIRST=="JONI" & LAST=="RACHFORD"	
		replace FIRST="JOHNNY" if FIRST=="JOHNNEY" & LAST=="MINARICK"		
	}
	if `x'==2002 {
		replace LAST="STANLEY" if LAST=="STANELY" & FIRST=="TRINA"
		replace FIRST="VICKY" if FIRST=="VICKI" & LAST=="CROW"
		replace FIRST="MARK" if FIRST=="MARC" & LAST=="KAPETAN"
		replace FIRST="JOSEPH" if FIRST=="JJOE" & LAST=="DEERING"
		replace LAST="DALY" if LAST=="DALT" & FIRST=="THOMAS"
		replace LAST="DOMANIS" if LAST=="DUMANIS" & FIRST=="BONNIE"
		replace FIRST="DONALD" if FIRST=="DON" & LAST=="RATZLAFF"
		replace FIRST = "JENNIFER" if FIRST=="JENNNIFER" & LAST=="MILLER" & CNTYNAME=="VENTURA"	
	}		
	if `x'==2003 {
		replace FIRST="TERENCE" if FIRST=="TERRENCE" & LAST=="HALLINAN"
		replace FIRST="FRED" if FIRST=="FREDRICK" & LAST=="CHYR"
	}
	if `x'==2004 {
		* confirmed on internet since it involves a gender switch
		replace FIRST="JOHN" if FIRST=="JOAN" & LAST=="NICOLETTI"
		replace FIRST = "JEFFREY" if FIRST=="JEFF" & LAST=="KETELSEN" & CNTYNAME=="VENTURA"		
	}
	if `x'==2005 {
		replace FIRST="MICHAEL" if FIRST=="MICHEAL" & LAST=="BERGFELD"
		replace place`x'="SHORELINE UNIFIED" if place`x'=="SHORELINE UNIFED"
	}
	if `x'==2006 {
		replace FIRST="MONICA" if LAST=="GARCIA" & raceid==200601355
		replace FIRST="JAMES" if FIRST=="JIM" & LAST=="CHOURA"		
	}
	if `x'==2007 replace FIRST="JAMES" if FIRST=="JIM" & LAST=="LOMAKO"
	if `x'==2008 {
		replace FIRST="STEPHEN" if FIRST=="STEVEN" & LAST=="SEATON"
		replace FIRST="THOMAS" if FIRST=="TOM" & LAST=="TRYON"
		replace FIRST="EDWARD" if FIRST=="CEDWARD" & LAST=="MACK"
		replace LAST="SCHOFIELD" if FIRST=="EDWARD" & LAST=="SCOFIELD"
		replace FIRST="PHILIP" if FIRST=="PHIL" & LAST=="THALHEIMER"		
	}
	if `x'==2009 {
		replace LAST="WILLIAMS" if LAST=="WILLLIAMS" & FIRST=="TRINA"
		replace FIRST="JOZEF" if FIRST=="JOSEF" & FIRST=="ESSAVI"
	}
	if `x'==2010 {
		replace FIRST="RUSSELL" if FIRST=="RUSSEL" & LAST=="THOMAS"
		replace FIRST="DANIEL" if FIRST=="WILLIAM" & LAST=="OMALLEY"
		replace LAST="AGOSTINI" if LAST=="DAGOSTINI" & FIRST=="JOHN"
		replace FIRST="MICHAEL" if FIRST=="MIKE" & LAST=="ZIMMERMAN"
		replace FIRST="DARREL" if FIRST=="DARELL" & LAST=="FONG"
		replace FIRST="MAGDALENA" if FIRST=="MAGDELENA" & LAST=="CARRASCO"
		replace FIRST="BEVERLY" if FIRST=="BEV" & LAST=="JOHNSON"
	}
	if `x'==2013 {
		replace LAST="ALVARADO" if FIRST=="TERESA" & LAST=="ALVARDO"
		* San Diego Mayoral special election occurred initially in 2013 and the runoff in 2014; keep only the runoff in 2014
		gen sd_mayor = 1 if CNTYNAME=="SAN DIEGO" & office`x'=="MAYOR"
		preserve
		keep if sd_mayor==1
		rename *2013 *2014 
		save sd_mayor, replace
		restore
		drop if sd_mayor==1
		drop sd_mayor
	}	
	if `x'==2014 {	
		replace FIRST="RACHELLE" if FIRST=="RACHEL" & LAST=="HILL" & raceid==20140120
		replace LAST="REINSON" if LAST=="RENISON" & FIRST=="JOHN" & raceid==20140150
		replace FIRST="EDUARDO" if FIRST=="EDWARD" & LAST=="MITCHELL" & raceid==20140312
		replace FIRST="JEFFREY" if FIRST=="JEFF" & LAST=="ENGEL" & raceid==20140367
		replace LAST="RINDONE" if LAST=="RIDNONE" & FIRST=="JERRY" & raceid==20141034
		replace FIRST="CHRIS" if FIRST=="CHIRS" & LAST=="CATE" & raceid==20141054
		replace LAST="HOLMAN" if LAST=="HOMAN" & FIRST=="ELBERT" & raceid==20141058
		replace LAST="GERWAL" if LAST=="GREWAL" & FIRST=="AS" & raceid==20141058
		replace FIRST="DYANE" if FIRST=="BURGOS" & LAST=="MEDINA" & raceid==20141060
		replace LAST="GAGLIARDI" if LAST=="GAGIARDI" & FIRST=="DON" & raceid==20141123
	}
	
	* Check that all individuals who are designated for a runoff actually appear twice
	duplicates tag LAST FIRST CNTYNAME, g(tag)
	if `x'==2011 {
		* miscoding -- these are ranked too low for runoff
		replace elected=2 if tag==0 & elected==3
	}
	assert tag>0 if elected==3
	
	* Individuals who run in multiple elections in the same year
	*	(1) Same date/place/office; different race: either same day runoff or different term lengths FULL/SHORT
	*	(2) Same date, different office
	* 	(3) Different date, same place/office: different day runoff
	* 	(4) Different date, different office, similar place
	*   (5) Same date, similar but different place (i.e. Tracy Joint Union High v. Tracy Unified), same office	
	*	(6) Different date, same office: short/full
	
	* If on different date, keep the earlier election (if not a runoff)
	* If on the same date, keep the election result with the better outcome (higher margin of victory/lower margin of loss)

	* For different dates: these should be flagged -- running again within the same year	

	gen todrop = .
	gen runagain = .
		
	* (1) Same date/place/office; different race
	* keep higher margin
	
	duplicates tag LAST FIRST BALDESIG CNTYNAME place`x' office`x' date`x', g(tag1)
	duplicates tag LAST FIRST BALDESIG CNTYNAME place`x' office`x' date`x' raceid`x', g(tag2)
	tab tag1, m	
	bys LAST FIRST BALDESIG CNTYNAME place`x' office`x' date`x' (margin`x' totalvotes`x'): gen x=_n
	list LAST FIRST BALDESIG CNTYNAME place`x' office`x' date`x' if tag1!=0 & tag2==0

	forval y=1/4 {
		replace todrop=1 if x<=`y' & tag1==`y' & tag2==0
	}
	drop x tag1 tag2
	
	* (2) Same date, different office (allow for different counties/place)
	* keep higher margin
	* if same margin, keep county later in alphabet
	
	duplicates tag LAST FIRST BALDESIG date`x', g(tag1)
	duplicates tag LAST FIRST BALDESIG date`x' office`x', g(tag2)	
	tab tag1, m	
	bys LAST FIRST BALDESIG date`x' (margin`x' CNTYNAME office`x'): gen x=_n
	list LAST FIRST BALDESIG date`x' if tag1!=0 & tag2==0

	forval y=1/4 {
		replace todrop=1 if x<=`y' & tag1==`y' & tag2==0
	}
	drop x tag1 tag2
	
	* (3) Different date, same place/office: different day runoff (don't include ballot desig)
	* make sure term is the same amount
	* keep later election
	* make sure these are runoffs
	* for those not formally designated runoffs, they are actually runoffs if second election only has two candidates
	
	duplicates tag LAST FIRST CNTYNAME place`x' office`x' term`x', g(tag1)
	duplicates tag LAST FIRST CNTYNAME place`x' office`x' term`x' date`x', g(tag2)	
	tab tag1, m	
	bys LAST FIRST CNTYNAME place`x' office`x' term`x' (date`x'): gen x=_n
	bys LAST FIRST CNTYNAME place`x' office`x' term`x': egen max=max(elected)
	disp "NOT RUNOFFS"
	list LAST FIRST BALDESIG CNTYNAME place`x' office`x' term`x' date elected if max!=3 & tag1!=0 & tag2==0

	* for those not designated runoffs, designate the elections as runoffs if 
	* 	(1) two elections for the same office occur in the same year, with the same people
	*	(2) the second election only has two candidates
	*	(3) both elections have the same term
	disp "CHANGE TO RUNOFF"
	* if second election seems like a runoff, then designate the first election outcome as such; it should be dropped
	gen temp=1 if tag1!=0 & tag2==0 & x!=1 & num_cand==2 & max!=3
	bys LAST FIRST CNTYNAME place`x' office`x' term`x' (date`x'): egen max_temp = max(temp)
	list LAST FIRST BALDESIG CNTYNAME place`x' office`x' term`x' date elected if tag1!=0 & tag2==0 & x==1 & max!=3 & max_temp==1	
	replace elected=3 if tag1!=0 & tag2==0 & x==1 & max!=3 & max_temp==1 
	drop max temp max_temp
	bys LAST FIRST CNTYNAME place`x' office`x' term`x': egen max=max(elected)	
	
	list LAST FIRST BALDESIG CNTYNAME place`x' office`x' term`x' date elected if tag1!=0 & tag2==0 & max==3	

	forval y=1/4 {
		replace todrop=1 if x<=`y' & tag1==`y' & tag2==0 & max==3
	}
	drop x tag1 tag2 max
	
	* (4) Different office, different date, similar place (based on first word): keep earlier election
	* unless already tagged, meaning election on same date, then just leave as is
	
	gen place1=word(place`x', 1)
	duplicates tag LAST FIRST BALDESIG place1, g(tag1)
	duplicates tag LAST FIRST BALDESIG place1 office`x', g(tag2)
	duplicates tag LAST FIRST BALDESIG place1 date`x', g(tag3)	
	tab tag1, m	
	bys LAST FIRST BALDESIG place1 (date`x'): gen x=_n
	bys LAST FIRST BALDESIG place1 (date`x'): egen max=max(elected)
	assert max!=3 if tag1!=0 & tag2==0 & tag3==0 
	assert max!=3 if tag1!=0 & tag2==0 & tag3==0 
	list LAST FIRST BALDESIG CNTYNAME place`x' office`x' date`x' if tag1!=0 & tag2==0 & tag3==0

	forval y=1/4 {
		replace todrop=1 if x!=1 & tag1==`y' & tag2==0 & tag3==0
		replace runagain=1 if x!=1 & tag1==`y' & tag2==0 & tag3==0
	}
	drop x tag1 tag2 tag3 place1 max	
	
	* (5) Same date, similar but different place (i.e. Tracy Joint Union High v. Tracy Unified), same office
	* keep one with higher margin
	* if same margin, keep place later in alphabet
	
	gen place1=word(place`x', 1)
	duplicates tag LAST FIRST BALDESIG place1 date`x' office`x', g(tag1)
	duplicates tag LAST FIRST BALDESIG place1 date`x' office`x' place`x', g(tag2)
	tab tag1, m	
	bys LAST FIRST BALDESIG place1 date`x' office`x' (margin`x' place`x'): gen x=_n
	list LAST FIRST BALDESIG CNTYNAME place`x' office`x' date`x' if tag1!=0 & tag2==0

	forval y=1/4 {
		replace todrop=1 if x<=`y' & tag1==`y' & tag2==0 
	}
	drop x tag1 tag2 place1	

	* (6) Different date, same office: short/full (ballot desig may change -- mostly to incumbent)
	* keep earlier election, since this isn't a runoff
	
	duplicates tag LAST FIRST CNTYNAME place`x' office`x', g(tag1)
	duplicates tag LAST FIRST CNTYNAME place`x' office`x' term, g(tag2)
	duplicates tag LAST FIRST CNTYNAME place`x' office`x' date, g(tag3)	
	tab tag1, m	
	bys LAST FIRST CNTYNAME place`x' office`x' (date`x'): gen x=_n
	bys LAST FIRST CNTYNAME place`x' office`x': egen max=max(elected)
	disp "RUNOFFS"
	list LAST FIRST BALDESIG CNTYNAME place`x' office`x' date`x' term elected if max==3 & tag1!=0 & tag2==0 & tag3==0 
	
	list LAST FIRST BALDESIG CNTYNAME place`x' office`x' date`x' term if tag1!=0 & tag2==0 & tag3==0 & max!=3

	forval y=1/4 {
		replace todrop=1 if x!=1 & tag1==`y' & tag2==0 & tag3==0 & max!=3
		replace runagain=1 if x!=1 & tag1==`y' & tag2==0 & tag3==0 & max!=3
	}
	drop x tag1 tag2 tag3 max	
	
	* Handcheck remaining duplicates
	duplicates report LAST FIRST if todrop!=1
	duplicates tag LAST FIRST if todrop!=1, g(tag1)
	list LAST FIRST BALDESIG CNTYNAME place office date elected voteshare term if tag1!=0 & tag1!=.
	* outsheet this list in order to make hand corrections
	outsheet LAST FIRST BALDESIG CNTYNAME place office date elected voteshare margin rank term if tag1!=0 & tag1!=.  ///
		using ca-`x'-handcheck.csv, comma replace

	* Incorporate handchecked changes
	global year `x'	
	do 01a-clean-election-handcheck.do

	assert todrop==1 if runagain==1
		
	duplicates report LAST FIRST if todrop!=1
	duplicates list LAST FIRST if todrop!=1	

	* Check that I'm not dropping all instances of a given name (due to qualifying for more than one category above)
	bys LAST FIRST: gen y=_n
	bys LAST FIRST: egen max = max(y)
	bys LAST FIRST: egen tot = total(todrop)
	assert tot!=max  
	
	drop y tot max
	
	preserve
	keep if todrop==1
	drop tag 
	sort LAST FIRST year`x'
	save ca-local-dropped`x', replace
	
	restore
	drop if todrop==1
	
	* drop the remaining duplicates
	* one cannot discern if they are the same person or two different people
	disp "REMAINING DUPLICATES"
	duplicates report LAST FIRST CNTYNAME
	duplicates tag LAST FIRST CNTYNAME, g(tag_dup)
	list if tag_dup!=0
	drop if tag_dup!=0
	
	drop tag* todrop runagain
	
	sort LAST FIRST CNTYNAME
	gen id`x' = _n
	order LAST FIRST CNTYNAME BALDESIG
	count
	save ca-local`x', replace
}


**************
* Clean up
**************

forval x=1995/2014 {
	rm CEDA`x'Data.dta 
	rm CEDA`x'Data-temp.dta 
}	
